Main Code

Main Code#

import pandas as pd
import plotly.express as px

# Use io.StringIO to treat the string as a file
data_file = '../datasets/population_area.csv'

# Read the CSV into a pandas DataFrame
# Use quotechar='"' to handle commas within quoted fields like the country name
df = pd.read_csv(data_file)

# Optional: Clean up column names (remove leading/trailing spaces)
df.columns = df.columns.str.strip()

# 2. Get User Input for Multiple Countries
# Display available countries/regions to help the user
available_countries = sorted(df['Country'].unique()) # Sort for easier reading

countries_input = 'United States of America,Canada,Japan,Germany,India,Indonesia,Brazil,South Africa,Niger'
indicators_inputs = ['Population aged 0 to 14 years old (percentage)','Sex ratio (males per 100 females)']

# Parse the input string into a list of names, stripping whitespace
selected_countries = [name.strip() for name in countries_input.split(',') if name.strip()]

if not selected_countries:
    print("No country names entered. Exiting.")
else:
    # 3. Filter Data for ALL selected countries
    df_filtered = df[df['Country'].isin(selected_countries)].copy() # Use .copy() to avoid SettingWithCopyWarning
    df_filtered = df_filtered[df_filtered['Indicator'].isin(indicators_inputs)].copy()

    # 4. Handle No/Partial Data
    found_countries = df_filtered['Country'].unique()
    not_found = set(selected_countries) - set(found_countries)

    if not found_countries.any(): # Check if the filtered dataframe is empty
        print(f"No data found for any of the specified countries: {', '.join(selected_countries)}")
    else:
        if not_found:
            print(f"\nWarning: No data found for the following requested countries: {', '.join(not_found)}")
            print(f"Plotting data for: {', '.join(found_countries)}")
        else:
             print(f"\nFound data for: {', '.join(found_countries)}")

        # 5. Data Cleaning
        df_filtered['Value'] = df_filtered['Value'].str.replace(',', '', regex=False)
        df_filtered['Value'] = pd.to_numeric(df_filtered['Value'], errors='coerce')
        df_filtered.dropna(subset=['Value'], inplace=True)

        # Sort by country and year for potentially cleaner line connections
        df_filtered.sort_values(['Country', 'year'], inplace=True)

        # Check again if data remains after cleaning
        if df_filtered.empty:
             print(f"No valid numeric indicator data found for the selected countries after cleaning.")
        else:
            # 6. Plot Data
            print(f"\nGenerating plot for selected countries...")

            # --- Plotting Strategy ---
            # We need to distinguish between countries AND indicators.
            # Option 1: Color by Country, Line Style by Indicator (good if few indicators)
            # Option 2: Color by Indicator, Line Style by Country (good if few countries)
            # Let's use Option 1 as a default, adding symbols for clarity.

            title_countries = ', '.join(found_countries)
            if len(title_countries) > 60: # Truncate title if too long
                 title_countries = title_countries[:57] + "..."

            fig = px.line(
                df_filtered,
                x='year',
                y='Value',
                color='Country',         # Different color for each country
                line_dash='Indicator',   # Different line style for each indicator
                symbol='Indicator',      # Different marker shape for each indicator
                markers=True,            # Show markers on the lines
                title=f'Indicators for {title_countries} over Time',
                hover_data=['Indicator'] # Show indicator name clearly on hover
            )

            # Customize layout (optional)
            fig.update_layout(
                xaxis_title='Year',
                yaxis_title='Indicator Value',
                legend_title='Legend' # Generic legend title
            )
            # Improve legend clarity (optional, might make it large)
            # fig.update_layout(legend={'traceorder': 'grouped'})

            # 7. Show Plot
            fig.show()
Found data for: Brazil, Canada, Germany, India, Indonesia, Japan, Niger, South Africa, United States of America

Generating plot for selected countries...